package com.chance.cc.crawler.prod.command.job.domain.news.ocn.module;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.CrawlerJob;
import com.chance.cc.crawler.core.downloader.HttpConfig;
import com.chance.cc.crawler.core.downloader.HttpPage;
import com.chance.cc.crawler.core.filter.FilterInfo;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.meta.core.bean.CrawlerMetaConstant;
import com.chance.cc.crawler.meta.core.bean.job.CrawlerScheduleJob;
import com.chance.cc.crawler.prod.command.job.domain.news.NewsCommonScript;
import org.apache.commons.lang3.StringUtils;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRecordFilter.count;
import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRecordFilter.dateRange;
import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.turnPage;

/**
 * @Author Zhao.Hhuan
 * @Date Create in 2021/6/21 15:42
 * @Description
 *      中投网 产经板块
 **/
public class OcnChanJingTraceCrawlerSchedulerJob extends NewsCommonScript {
    public static final String domain = "ocn";
    public static final String site = "module_chanjing";

    public static void main(String[] args) {
//        publishCrawlerScheduleJobInfo();
        System.out.println(crawlerSchedulejob().generateCrawlerTriggerKey());
    }

    public static CrawlerJob publishCrawlerScheduleJobInfo(){
        CrawlerJob crawlerJob = crawlerSchedulejob();

        //发布定时采集作业
        CrawlerScheduleJob crawlerScheduleJob = new CrawlerScheduleJob();
        crawlerScheduleJob.setDomain(domain);
        crawlerScheduleJob.setCrawlerJob(JSON.toJSONString(crawlerJob));
        crawlerScheduleJob.setJobType(CrawlerMetaConstant.ScheduleCrawlerJobType.crawler.enumVal());
        crawlerScheduleJob.setNote("中投网产经板块回溯七天文章");
        crawlerScheduleJob.setCrawlerKey(crawlerJob.generateCrawlerKey());
        HttpPage page =metaServiceCommand .addOrUpdateCrawlerScheduleJob(crawlerScheduleJob);
        System.out.println("发布crawler作业：" + page.getRawText());
        return crawlerJob;

    }

    public static CrawlerJob crawlerSchedulejob(){
        String url = "http://www.ocn.com.cn/chanjing/idynews/1";
        String siteBiz = "news-trace";

        FilterInfo filter = new FilterInfo();
        filter.setFilter(count);
        filter.setCurCount(0);
        filter.setCountTotalNum(5);

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, turnPage)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(dateRange)
                .turnPageFilterInfo(filter)
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 7 + 5,null))
                .releaseTime(System.currentTimeMillis())
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
//                .proxy(proxy)
                .build();
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        return CrawlerJob.builder()
                .crawlerJobThreadNumber(5)
                .triggerInfo(
                        domain,
                        CrawlerMetaConstant.ScheduleJobTrigger_Cron,
                        System.currentTimeMillis(),
                        StringUtils.joinWith("-",site,siteBiz,CrawlerMetaConstant.ScheduleJobTriggerJob_Realtime))
                .crawlerRequestQueue(CrawlerMetaConstant.redisRequestQueue(StringUtils.joinWith("-","crawler",domain,site,siteBiz,"queue")))
//                .fileResultPipeline("kafka", "/data/chance_crawler_runner/logs/node/ocn.log", false)
                .kafkaResultPipeline("kafka",kafkaTopic,null)
                .requestRecord(crawlerRequestRecord)
                .build();
    }
}
